# -*- coding: utf-8 -*-
import json
import logging

import math
import os
import threading
import time
import requests
from traceback import format_exc
from datetime import datetime

from pymongo import UpdateOne
from pyquery import PyQuery
from requests import TooManyRedirects
from scrapy.utils.project import get_project_settings
from zc_core.client.mongo_client import Mongo
from zc_core.middlewares.proxies.cached_pool import CachedProxyPool
from zc_core.model.items import Order
from zc_core.util import file_reader
from zc_core.util.batch_gen import time_to_batch_no
from zc_core.util.common import parse_time
from zc_core.util.encrypt_util import base64_decode
from esgcc.simple.simple_dao import SimpleDao
from esgcc.simple.simple_session import SimpleSession

logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(levelname)s: %(message)s',
    datefmt='%Y-%m-%d %H:%M:%S',
    filename='order_list_ios.log',
    filemode='a')
console = logging.StreamHandler()
formatter = logging.Formatter('%(asctime)s - %(levelname)s: %(message)s')
console.setFormatter(formatter)
logging.getLogger().addHandler(console)

mongo = Mongo()
batch_no = '20200930'

threadmax = threading.BoundedSemaphore(12)
thread_pool = list()


class SkuMain(threading.Thread):
    def __init__(self, sku_id):
        threading.Thread.__init__(self)
        self.sku_id = sku_id

    def send_req(self, sku_url):
        cookies = {'JSESSIONID': '93378D2E5B4136BFFB72BB50BD2EC125', '__t_c_k_': 'cd2518ef2e0a455ab36862a6c3cd75c1',
                   '__s_f_c_s_': 'ACEC031B61C38550ED236ED5BC1CFC19', '__d_s_': 'ACEC031B61C38550ED236ED5BC1CFC19'}
        headers = {
            'Connection': 'keep-alive',
            'Accept': 'text/javascript, application/javascript, application/ecmascript, application/x-ecmascript, */*; q=0.01',
            'X-Requested-With': 'XMLHttpRequest',
            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.25 Safari/537.36 Core/1.70.3741.400 QQBrowser/10.5.3863.400',
            'Accept-Encoding': 'gzip, deflate',
            'Accept-Language': 'zh-CN,zh;q=0.9',
        }
        try:
            rs = requests.get(
                url=sku_url,
                cookies=cookies,
                headers=headers,
                timeout=180
            )
            return rs.text
        except TooManyRedirects as ex:
            logging.info('--> retry send_req: sku_url=%s' % sku_url)
            self.send_req(sku_url)

    def run(self):
        try:
            bulk_list = list()
            # main
            sku_url_tpl = 'http://b.esgcc.com.cn/showDetail/{}'
            sku_url = sku_url_tpl.format(self.sku_id)
            txt = self.send_req(sku_url)
            jpy = PyQuery(txt)
            imgs = jpy('ul#list_h li img')
            for index, img in enumerate(imgs.items()):
                idx = index + 1
                img_url = img.attr('src')
                _id = 'main_{}_{}'.format(self.sku_id, idx)
                bulk_list.append(UpdateOne({'_id': _id}, {'$set': {
                    '_id': _id,
                    'idx': idx,
                    'sku': self.sku_id,
                    'img_url': img_url,
                    'img_name': '{}-{}.jpg'.format(self.sku_id, idx),
                    'img_type': 'main',
                }}, upsert=True))

            if bulk_list:
                mongo.bulk_write(collection='imgs_{}'.format(batch_no), bulk_list=bulk_list)
                logging.info('main  : sku=%s, img=%s' % (self.sku_id, len(bulk_list)))
        except Exception as ex:
            logging.info('--> retry send_req: sku_url=%s' % self.sku_id)
        finally:
            threadmax.release()


class SkuDetail(threading.Thread):
    def __init__(self, sku_id):
        threading.Thread.__init__(self)
        self.sku_id = sku_id

    def send_req(self, sku_url):
        cookies = {'JSESSIONID': '93378D2E5B4136BFFB72BB50BD2EC125', '__t_c_k_': 'cd2518ef2e0a455ab36862a6c3cd75c1',
                   '__s_f_c_s_': 'ACEC031B61C38550ED236ED5BC1CFC19', '__d_s_': 'ACEC031B61C38550ED236ED5BC1CFC19'}
        headers = {
            'Connection': 'keep-alive',
            'Accept': 'text/javascript, application/javascript, application/ecmascript, application/x-ecmascript, */*; q=0.01',
            'X-Requested-With': 'XMLHttpRequest',
            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.25 Safari/537.36 Core/1.70.3741.400 QQBrowser/10.5.3863.400',
            'Accept-Encoding': 'gzip, deflate',
            'Accept-Language': 'zh-CN,zh;q=0.9',
        }
        try:
            rs = requests.get(
                url=sku_url,
                cookies=cookies,
                headers=headers,
                timeout=180
            )
            return rs.text
        except TooManyRedirects as ex:
            logging.info('--> retry send_req: sku_url=%s' % sku_url)
            # self.send_req(sku_url)

    def run(self):
        try:
            bulk_list = list()
            # detail
            sku_url_tpl = 'http://b.esgcc.com.cn/products/loadProductDetailInfomationAll?productId={}'
            sku_url = sku_url_tpl.format(self.sku_id)
            txt = self.send_req(sku_url)
            jpy = PyQuery(txt)
            imgs = jpy('div.detail_content img')
            for index, img in enumerate(imgs.items()):
                idx = index + 1
                img_url = img.attr('src')
                if not img_url:
                    img_url = img.attr('data-original')
                _id = 'detail_{}_{}'.format(self.sku_id, idx)
                bulk_list.append(UpdateOne({'_id': _id}, {'$set': {
                    '_id': _id,
                    'sku': self.sku_id,
                    'idx': idx,
                    'img_url': img_url,
                    'img_name': '{}-{}.jpg'.format(self.sku_id, idx),
                    'img_type': 'detail',
                }}, upsert=True))

            if bulk_list:
                mongo.bulk_write(collection='imgs_{}'.format(batch_no), bulk_list=bulk_list)
                logging.info('detail: sku=%s, img=%s' % (self.sku_id, len(bulk_list)))
        except Exception as ex:
            logging.info('--> retry send_req: sku_url=%s' % self.sku_id)
        finally:
            threadmax.release()


def crawl_main_images():
    sku_list = file_reader.read_rows('../doc/sku_list.txt')
    for sku_id in sku_list:
        threadmax.acquire()
        ext = SkuMain(sku_id)
        thread_pool.append(ext)
        ext.start()

        threadmax.acquire()
        ext = SkuDetail(sku_id)
        thread_pool.append(ext)
        ext.start()


if __name__ == '__main__':
    crawl_main_images()
